clear
use analysis.dta 





if $do_ten_fold == 1 {
	clear
	use analysis.dta 

	set seed 12345


	safedrop rsort
	gen rsort =runiform()
	sort rsort
	safedrop k 
	gen k = floor((_n-1)/_N*10)

	capture postclose roc_scores
	
	postfile roc_scores  year str30 model roc_score N using roc_score.dta, every(1) replace
	
	safedrop pred_early_stage pred_growth pred_early_stage_tfidf pred_growth_tfidf pred_early_stage_hp_ind pred_growth_hp_ind pred_early_stage_all pred_growth_all
	gen pred_early_stage = . 
	gen pred_growth = .
	gen pred_early_stage_tfidf = . 
	gen pred_growth_tfidf = .
	gen pred_early_stage_hp_ind = . 
	gen pred_growth_hp_ind = .
	gen pred_early_stage_all = . 
	gen pred_growth_all = .
	

		forvalues k=0/9	{
			
			di "`y', fold `k'"
			capture drop xxx
			capture drop yyy
			
			di "	Early Stage Models"
			quietly {
				logit gets_early_stage c.nov_startup_5##c.nov_startup_1##c.nov_public_firm_5##c.nov_public_firm_1    if k != `k' 
				predict xxx
				replace pred_early_stage = xxx if k == `k' 
				
				capture drop xxx
				qui: logit gets_early_stage c.tfidf_public_firm_5##c.tfidf_public_firm_1##c.tfidf_startup_5##c.tfidf_startup_1 if  k != `k' 
				predict xxx
				replace pred_early_stage_tfidf = xxx if k == `k' 
				
				capture drop xxx
				logit gets_early_stage i.hp_industry if k != `k' 
				predict xxx
				sum gets_early_stage
				replace xxx = `r(mean)' if xxx == .
				replace pred_early_stage_hp_ind = xxx if k == `k' 
			
			
				capture drop xxx
				logit gets_early_stage i.hp_industry  c.nov_startup_5##c.nov_startup_1##c.nov_public_firm_5##c.nov_public_firm_1  if k != `k' 
				predict xxx
				sum gets_early_stage
				replace xxx = `r(mean)' if xxx == .
				replace pred_early_stage_all= xxx if k == `k' 
			}
			
			
			di "	Equity Growth Models"	
			quietly {
				logit growth  c.nov_startup_5##c.nov_startup_1##c.nov_public_firm_5##c.nov_public_firm_1   if  k != `k' 
				predict yyy
				replace pred_growth = yyy if k == `k' 
				
				capture drop yyy
				logit growth  c.tfidf_public_firm_5##c.tfidf_public_firm_1##c.tfidf_startup_5##c.tfidf_startup_1 if  k != `k' 
				predict yyy
				replace pred_growth_tfidf = yyy if k == `k' 
				
				capture drop yyy
				logit growth i.hp_industry if k != `k' 
				predict yyy
				sum growth
				replace yyy = `r(mean)' if yyy == .
				replace pred_growth_hp_ind = yyy if k == `k' 
			
				
				capture drop yyy
				logit growth i.hp_industry   c.nov_startup_5##c.nov_startup_1##c.nov_public_firm_5##c.nov_public_firm_1  if k != `k' 
				predict yyy
				sum growth
				replace yyy = `r(mean)' if yyy == .
				replace pred_growth_all = yyy if k == `k' 
				
			}
		}
	save analysis.dta , replace

}
	






foreach v of varlist pred_growth* pred_early_stage* {
	replace `v' = 0 if `v' == . & year < 2017
	replace `v' = . if year >= 2017
	
}




use analysis.dta , replace
drop if  pred_early_stage ==. | pred_growth == .
sort pred_early_stage
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) gets_early_stage, over(bin) saving(a.gph, replace)  title("A. Gets Early Stage") ytitle("Share of Firms that Get Early Stage") b1title("Percentile of Diff. Score")
	roctab gets_early_stage pred_early_stage , graph saving(b.gph, replace) title("B. Gets Early Stage")


sort pred_growth
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) growth, over(bin)  saving(c.gph, replace) title("C. Equity Growth (IPO or Acq.)") ytitle("Share of Firms that Achieve Growth") b1title("Percentile of Diff. Score")
roctab growth pred_growth , graph saving(d.gph, replace) title("D. Growth (IPO or Acq.)")

graph combine a.gph b.gph c.gph d.gph, cols(2) title("{bf:Word Embeddings Model}", size(medium)) xsize(8) ysize(5) iscale(.44)
graph export "../../tex2/out_of_sample_performance.png" , as(png) width(800) height(700) replace


clear
use analysis.dta
sort pred_early_stage_tfidf
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) gets_early_stage, over(bin) saving(a.gph, replace)  title("A. Gets Early Stage") ytitle("Share of Firms that Get Early Stage") b1title("Percentile of Diff. Score")
roctab gets_early_stage pred_early_stage_tfidf , graph saving(b.gph, replace) title("B. Gets Early Stage")


sort pred_growth_tfidf
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) growth, over(bin)  saving(c.gph, replace) title("C. Growth(IPO or Acq.)") ytitle("Share of Firms that Achieve Growth") b1title("Percentile of Diff. Score")
roctab growth pred_growth_tfidf , graph saving(d.gph, replace) title("D. Growth (IPO or Acq.)")

graph combine a.gph b.gph c.gph d.gph, cols(2) title("{bf:TF-IDF Model}" ) xsize(8) ysize(5) iscale(.44)
graph export "../../tex2/out_of_sample_performance_tfidf.png" , as(png) width(800) height(800) replace

	



sort pred_early_stage_all
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) gets_early_stage, over(bin) saving(a.gph, replace)  title("A. Gets Early Stage") ytitle("Share of Firms that Get Early Stage") b1title("Percentile of Diff. Score")
roctab gets_early_stage pred_early_stage_all , graph saving(b.gph, replace) title("B. Gets Early Stage")


sort pred_growth_all
capture drop bin
gen bin =  floor((_n-1)/_N*20)/20
graph bar (mean) growth, over(bin)  saving(c.gph, replace) title("C. Growth(IPO or Acq.)") ytitle("Share of Firms that Achieve Growth") b1title("Percentile of Diff. Score")
roctab growth pred_growth_all , graph saving(d.gph, replace) title("D. Growth (IPO or Acq.)")

graph combine a.gph b.gph c.gph d.gph, cols(2) title("{bf:HP Industry Fixed Effects and Word Embeddings}" ) xsize(8) ysize(5) iscale(.44)
graph export "../../tex2/out_of_sample_performance_all.png" , as(png) width(800) height(800) replace




set scheme s1mono
clear 
use analysis.dta 
duplicates drop website , force

binscatter log_early_stage  nov_public_firm_5 , control( _Y*) title("A. {bf:Unconditional Correlation}" "{it:Fixed Effects:}  Founding Year") ytitle("Log(Early Stage +1)") xtitle(" Differentiation Score (5 Closest Public Firms)") saving(a.gph, replace)

binscatter log_early_stage  nov_public_firm_5 ,  absorb( year_ind ) title("B. {bf:Main Relationship}"  "{it:Fixed Effects:}" "Founding Year & HP Industry.") ytitle("Log(Early Stage +1)") xtitle(" Differentiation Score(5 Closest Public Firms)")  saving(b.gph, replace)

binscatter log_early_stage  nov_public_firm_5 ,  absorb( year_ind ) control(  _wlq*) title("C. {bf:Additional Controls}" "{it:Fixed Effects:}  Founding Year, HP Industry," "and Website Length Bin.") ytitle("Log(Early Stage +1)") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(c.gph, replace)

binscatter gets_early_stage nov_public_firm_5 ,  absorb( year_ind )  title("D. {bf:Extensive Margin}"  "{it:Fixed Effects:}" "Founding Year & HP Industry.") ytitle("1[Gets Early Stage Financing]") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(d.gph, replace)

graph combine a.gph b.gph c.gph d.gph , cols(2) iscale(.45) title("Binned Scatterplots", size(medsmall))

graph export  "../../tex2/binscatters_early_stage.png", as(png) width(800) height(800) replace




set scheme s1mono
clear 
use analysis.dta 

duplicates drop website , force



binscatter log_up_to_a  nov_public_firm_5 ,  absorb(year ) title(  "{it:Fixed Effects:}" "Founding Year") ytitle("Log(Early Stage + Series A +1)") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(a.gph, replace)

binscatter log_up_to_a  nov_public_firm_5 ,  absorb( year_ind ) title(  "{it:Fixed Effects:}" "Founding Year & HP Industry") ytitle("Log(Early Stage + Series A +1)") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(b.gph, replace)

binscatter log_seed  nov_public_firm_5 ,  absorb( year_ind ) title(  "{it:Fixed Effects:}" "Founding Year &  HP Industry" ) ytitle("Log(Seed +1)") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(c.gph, replace)

binscatter log_series_a  nov_public_firm_5 if gets_early_stage ==0  ,  absorb( year_ind ) title(  "{it:Fixed Effects:}" "Founding Year  & HP Industry" "{it:Subsample:}" "Firms with No Early Stage") ytitle("Log(Series A +1)") xtitle(" Differentiation Score (5 Closest Public Firms)")  saving(d.gph, replace)
graph combine a.gph b.gph c.gph d.gph , cols(2) iscale(.5) title("Binned Scatterplots of Differentiation Score" "and Series A or Seed Financing", size(medium))

graph export  "../../tex2/binscatters_series_a.png", as(png) width(1200) height(800) replace


gen nov_5 = (nov_public_firm_5 + nov_startup_5)/2


binscatter log_early_stage nov_5  ,  absorb( year_ind )  name(a , replace) ytitle("Log(Early Stage Financing+1)") xtitle("Strategic Differentiation (Top 5 Startups and Public Firms)")  title(  "{it:Fixed Effects:}" "Founding Year & HP Industry" )

binscatter log_early_stage nov_startup_5  ,  absorb( year_ind )  name(b , replace) ytitle("Log(Early Stage Financing+1)")  xtitle("Strategic Differentiation (Top 5 Startups)")  title(  "{it:Fixed Effects:}" "Founding Year & HP Industry" )

binscatter log_early_stage nov_public_firm_1  , absorb( year_ind )  name(c , replace) ytitle("Log(Early Stage Financing+1)")  xtitle("Strategic Differentiation (Top 1 Public Firm)")  title(  "{it:Fixed Effects:}" "Founding Year & HP Industry" )

	binscatter log_early_stage nov_startup_1  ,  absorb( year_ind )  name(d , replace) ytitle("Log(Early Stage Financing+1)")  xtitle("Strategic Differentiation (Top 1 Startups)")  title(  "{it:Fixed Effects:}" "Founding Year & HP Industry" )
	
	graph combine a b c d  , title("Binned Scatterplots of Other  Differentiation Scores" "and Early Stage Financing", size(medium)) iscale(.5)
	graph export  "../../tex2/binscatters_early_stage_other.png", as(png) width(1200) height(1000) replace 




	binscatter growth  nov_public_firm_5 , control( _Y*) title("A. {bf:Unconditional Correlation}" "{it:Fixed Effects:} Founding Year" ) ytitle("Growth") xtitle("Differentiation Score (5 Closest Public Firms)") saving(a.gph, replace)

	binscatter growth  nov_public_firm_5 ,  absorb( year_ind ) title("B. {bf:Main Relationship}"  "{it:Fixed Effects:}" "Founding Year & HP Industry.") ytitle("Growth") xtitle("Differentiation Score (5 Closest Public Firms)")  saving(b.gph, replace)

	binscatter growth  nov_public_firm_5 ,  absorb( year_ind ) control(  _wlq*) title("C. {bf:Additional Controls}" "{it:Fixed Effects:}" "Founding Year & HP Industry," "and Website Length Bin.") ytitle("Growth") xtitle("Differentiation Score (5 Closest Public Firms)")  saving(c.gph, replace)
	



	graph combine a.gph b.gph c.gph, cols(2) iscale(.5) title("Equity Outcomes and Differentiation Score" "Binned Scatterplots", size(medsmall)) ycommon

	graph export  "../../tex2/binscatters_growth.png", as(png) width(800) height(800) replace


